In [ ]:
    
import os
import cPickle as pickle
#load in data files to run
#one set of files has the trial and investigator matches to publication
#and the second has a lookup dict for the trail information
data_list_trial_match = []
data_list_trials = []
data_files = os.listdir('data/matched_trial_article/')
for f in data_files:
    if 'trial_match_' in f:
        data_list_trial_match.append(f)
    if 'trials_' in f:
        data_list_trials.append(f)
#dictionaries for joining all the individual dictionaries into
trial_invest_pub_match_dict = {}
pub_lookup_dict = {}
        
#load in trial match data
for f in data_list_trial_match:
    print f
    #load data to process
    data = pickle.load(open('data/matched_trial_article/' + f, 'rb'))
    #add data to trial match dict
    trial_invest_pub_match_dict.update(data)
    del data
#save dict
pickle.dump(trial_invest_pub_match_dict, open('data/trial_invest_pub_match_dict.pkl', 'wb'))
del trial_invest_pub_match_dict
#load in trial lookup data
for f in data_list_trials:
    print f
    #load data to process
    data = pickle.load(open('data/matched_trial_article/' + f, 'rb'))
    #add data to trial match dict
    pub_lookup_dict.update(data)
    del data
#save dict
pickle.dump(pub_lookup_dict, open('data/pub_lookup_dict.pkl', 'wb'))
del pub_lookup_dict
    
In [1]:
    
import cPickle as pickle
    
In [3]:
    
investiagtor_dict = pickle.load(open('data/investigator_dict.pkl', 'rb'))
    
In [9]:
    
#create investigator id lookup dict
id_investigator_lookup = {}
for invest in investiagtor_dict:
    for ids in investiagtor_dict[invest]['id']:
        id_investigator_lookup[ids] = invest
    
In [18]:
    
#save dict
pickle.dump(id_investigator_lookup, open('data/id_investigator_lookup.pkl', 'wb'))
    
In [15]:
    
id_investigator_lookup.items()[:10]
    
    Out[15]:
In [ ]: